In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn 
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly.express as px
from plotly.subplots import make_subplots
from plotly import tools
In [2]:
df_dep=pd.read_csv("../SurveyDataset/Depression.csv")
df_anx=pd.read_csv("../SurveyDataset/Anxiety.csv")
df_str=pd.read_csv("../SurveyDataset/Stress.csv")
In [3]:
df=pd.read_csv("../SurveyDataset/CleanData.csv")
df.head()
Out[3]:
Which age group do you belong to? Sex What is your current family type? How many hours do you sleep per day? Who do you sleep with usually in the bedroom? How is your quality of sleep? Does your health limit you in doing daily physical activities? [Light activities] Does your health limit you in doing daily physical activities? [Moderate activities] Does your health limit you in doing daily physical activities? [Heavy activities] How often do you consume alcohol in a week? ... Distrust Hopelessness Unavailability Practical barriers Stigma age class Gender Depression Anxiety Stress
0 18 - 25 Male Joint 4-6 With parents Very good No problem No problem No problem I don’t drink alcohol ... 0 1 0 0 0 0 1 Mild Normal Mild
1 18 - 25 Male Nuclear 7-9 Alone Good Very much Moderately Very less I don’t drink alcohol ... 1 1 0 1 0 0 1 Extremely Severe Extremely Severe Extremely Severe
2 18 - 25 Female Nuclear 7-9 With Siblings Good Very much Very much Very much I don’t drink alcohol ... 0 1 0 1 1 0 0 Severe Extremely Severe Moderate
3 18 - 25 Female Nuclear <4 Alone Average No problem No problem No problem I don’t drink alcohol ... 1 1 1 1 1 0 0 Moderate Severe Severe
4 18 - 25 Male Joint 7-9 With parents Very good Very less Moderately Very less I don’t drink alcohol ... 1 1 1 1 1 0 1 Mild Moderate Mild

5 rows × 64 columns

In [4]:
x,y=df.shape
In [5]:
df['Which age group do you belong to?'].unique()
Out[5]:
array(['18 - 25', '25 - 35', '35 - 45', '45 - 60'], dtype=object)
In [6]:
total = x
age1 = df[df['Which age group do you belong to?'] == '18 - 25']['Which age group do you belong to?'].count()
age2 = df[df['Which age group do you belong to?'] == '25 - 35']['Which age group do you belong to?'].count()
age3 = df[df['Which age group do you belong to?'] == '35 - 45']['Which age group do you belong to?'].count()
age4 = df[df['Which age group do you belong to?'] == '45 - 60']['Which age group do you belong to?'].count()

labels =['18 - 25', '25 - 35', '35 - 45', '45 - 60']
sizes = [age1, age2, age3,age4]
colors = ['#274472', '#5885AF', '#C3E0E5','#007e79']
# Creating plot
fig = plt.figure(figsize =(10, 7))
plt.pie(sizes,colors=colors,autopct='%1.1f%%', shadow=False, startangle=140)
plt.legend(labels, loc="best")
plt.show()
In [7]:
df['Sex'].unique()
Out[7]:
array(['Male', 'Female', 'Prefer not to say'], dtype=object)
In [8]:
gender1 = df[df['Sex'] == 'Male']['Sex'].count()
gender2 = df[df['Sex'] == 'Female']['Sex'].count()
gender3 = df[df['Sex'] == 'Prefer not to say']['Sex'].count()

labels =['Male', 'Female', 'Prefer not to say']
sizes = [gender1, gender2, gender3]
colors = ['#274472', '#5885AF', '#C3E0E5']
# Creating plot
fig = plt.figure(figsize =(10, 7))
plt.pie(sizes,colors=colors,autopct='%1.1f%%', shadow=False, startangle=140)
plt.legend(labels, loc="best")
plt.show()
In [9]:
list(df['What is your current family type?'].value_counts())
Out[9]:
[129, 51, 15, 1]
In [10]:
fig = go.Figure(data=[
    go.Bar(name='family', x=df['What is your current family type?'].unique(), y=list(df['What is your current family type?'].value_counts())
           ,marker_color=colors),
])
fig.show()

Sleep

In [11]:
df['How many hours do you sleep per day?'].unique()
Out[11]:
array(['4-6', '7-9', '<4', '9+'], dtype=object)
In [12]:
# plt.figure(figsize = (100,100))
plt.rcParams["figure.figsize"] = [20.00, 15.50]
plt.rcParams["figure.autolayout"] = True
fig,axes = plt.subplots(nrows=3, ncols=3)
# fig.set_figheight(50)
# fig.set_figwidth(50)
plt.subplots_adjust(wspace = 0)
fig.suptitle('How important sleep is for mental health', fontsize = 25, y=1.08)
ques=['How many hours do you sleep per day?','Who do you sleep with usually in the bedroom?','How is your quality of sleep?']
hue=['Depression','Anxiety','Stress']
for i in range(3):
    for j in range(3):
        sns.countplot(ax=axes[i][j],x = df[ques[i]], hue = df[hue[j]],palette=sns.cubehelix_palette(start=.5, rot=-.5))

Physical Activities

In [13]:
df_count=pd.DataFrame()
In [14]:
ques=['Does your health limit you in doing daily physical activities?  [Light activities]','Does your health limit you in doing daily physical activities?  [Moderate activities]',
       'Does your health limit you in doing daily physical activities?  [Heavy activities]',]
In [15]:
light=list(df['Does your health limit you in doing daily physical activities?  [Light activities]'].value_counts())
heavy=list(df['Does your health limit you in doing daily physical activities?  [Heavy activities]'].value_counts())
mod=list(df['Does your health limit you in doing daily physical activities?  [Moderate activities]'].value_counts())
In [16]:
data={'Activity':['Light activities','Moderate Activities','Heavy Activities'],'No problem':[light[0],heavy[0],mod[0]],'Very much':[light[1],heavy[1],mod[1]],'Very less':[light[2],heavy[2],mod[2]],'Moderately':[light[3],heavy[3],mod[3]]}
In [17]:
light[0],heavy[0],mod[0]
Out[17]:
(95, 64, 81)
In [18]:
# Creates pandas DataFrame.
df_count = pd.DataFrame(data)
df_count.head()
Out[18]:
Activity No problem Very much Very less Moderately
0 Light activities 95 49 36 16
1 Moderate Activities 64 57 52 23
2 Heavy Activities 81 53 41 21
In [19]:
colors = ['#274472', '#5885AF', '#C3E0E5','#007e79']
fig = go.Figure(data=[
    go.Bar(name='No problem', x=df_count['Activity'], y=df_count['No problem'],marker_color=colors[0]),
    go.Bar(name='Very much', x=df_count['Activity'], y=df_count['Very much'],marker_color=colors[1]),
    go.Bar(name='Very less', x=df_count['Activity'], y=df_count['Very less'],marker_color=colors[2]),
    go.Bar(name='Moderately', x=df_count['Activity'], y=df_count['Moderately'],marker_color=colors[3]),
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
In [20]:
df.columns
Out[20]:
Index(['Which age group do you belong to?', 'Sex',
       'What is your current family type?',
       'How many hours do you sleep per day?',
       'Who do you sleep with usually in the bedroom?',
       'How is your quality of sleep?',
       'Does your health limit you in doing daily physical activities?  [Light activities]',
       'Does your health limit you in doing daily physical activities?  [Moderate activities]',
       'Does your health limit you in doing daily physical activities?  [Heavy activities]',
       'How often do you consume alcohol in a week?',
       'How often do you smoke in a day?',
       'Overall how would you rate your mental health?',
       'What do you think is the main cause of Depression/ Anxiety /Stress?',
       'Have you ever been diagnosed with a mental disorder before?',
       'Is there a history of mental disorder in your family?',
       'If you were diagnosed with Depression/ Anxiety /Stress in the past would you get treated?',
       'What do you think is the reason for people not considering getting treated for mental health related problems?',
       'Who would you go to if you were having mental health issues?', 'Q1(S)',
       'Q2(A)', 'Q3(D)', 'Q4(A)', 'Q5(D)', 'Q6(S)', 'Q7(A)', 'Q8(S)', 'Q9(A)',
       'Q10(D)', 'Q11(S)', 'Q12(S)', 'Q13(D)', 'Q14(S)', 'Q15(A)', 'Q16(D)',
       'Q17(D)', 'Q18(S)', 'Q19(A)', 'Q20(A)', 'Q21(D)', 'TIPI1', 'TIPI2',
       'TIPI3', 'TIPI4', 'TIPI5', 'TIPI6', 'TIPI7', 'TIPI8', 'TIPI9', 'TIPI10',
       'Fear and shame', 'Limited awareness', 'Lack of Knowledge',
       'Feelings of inadequacy', 'Finance', 'Distrust', 'Hopelessness',
       'Unavailability', 'Practical barriers', 'Stigma', 'age class', 'Gender',
       'Depression', 'Anxiety', 'Stress'],
      dtype='object')

What do you think is the main cause of Depression, Anxiety or Stress?

In [21]:
df['What do you think is the main cause of Depression/ Anxiety /Stress?'].unique()
Out[21]:
array(['Stressful Circumstances', 'Upbringing', 'Genetics',
       'Chemical Imbalance', 'Ageing'], dtype=object)
In [22]:
df['What do you think is the main cause of Depression/ Anxiety /Stress?'].value_counts()
Out[22]:
Stressful Circumstances    148
Upbringing                  24
Chemical Imbalance          22
Ageing                       1
Genetics                     1
Name: What do you think is the main cause of Depression/ Anxiety /Stress?, dtype: int64
In [23]:
count=list(df['What do you think is the main cause of Depression/ Anxiety /Stress?'].value_counts())
labels =['Stressful Circumstances', 'Upbringing', 'Genetics',
       'Chemical Imbalance', 'Ageing']
colors = ['#274472', '#5885AF', '#C3E0E5','#007e79','#657295']
# Creating plot
fig = plt.figure(figsize =(10, 7))
plt.pie(count,colors=colors,autopct='%1.1f%%', shadow=False, startangle=140)
plt.legend(labels, loc="best")
plt.show()
In [24]:
count=list(df['If you were diagnosed with Depression/ Anxiety /Stress in the past would you get treated?'].value_counts())
labels =['Yes', 'No', 'Doesn\'t apply']
colors = ['#274472', '#5885AF', '#C3E0E5']
# Creating plot
fig = plt.figure(figsize =(10, 7))
plt.pie(count,colors=colors,autopct='%1.1f%%', shadow=False, startangle=140)
plt.legend(labels, loc="best")
plt.show()
In [25]:
prb=df.iloc[:,49:59]
In [26]:
prb.head()
Out[26]:
Fear and shame Limited awareness Lack of Knowledge Feelings of inadequacy Finance Distrust Hopelessness Unavailability Practical barriers Stigma
0 1 0 1 1 0 0 1 0 0 0
1 1 0 1 0 1 1 1 0 1 0
2 1 1 1 1 0 0 1 0 1 1
3 1 1 1 1 1 1 1 1 1 1
4 1 1 1 1 1 1 1 1 1 1
In [27]:
x=[]
for i in prb.columns:
    x.append(df[i].sum())
In [28]:
colors = ['#274472', '#5885AF', '#C3E0E5','#007e79']
fig = go.Figure(go.Bar(
            x=x,
            y=prb.columns,
            orientation='h',marker_color=colors[0]))

fig.show()

Who would you go to if you were having mental health issues?

In [29]:
x=list(df['Who would you go to if you were having mental health issues?'].value_counts())
In [41]:
colors = ['#274472', '#5885AF', '#C3E0E5','#007e79']
fig = go.Figure(go.Bar(
            x=x,
            y=df['Who would you go to if you were having mental health issues?'].unique(),
            orientation='h',marker_color=colors[3]))

fig.show()
In [43]:
tr=df.groupby(['How often do you smoke in a day?', 'Depression']).size().reset_index(name='counts')
tr
Out[43]:
How often do you smoke in a day? Depression counts
0 1 - 5 times a day Extremely Severe 4
1 1 - 5 times a day Mild 2
2 1 - 5 times a day Moderate 2
3 1 - 5 times a day Normal 2
4 6 - 15 times a day Mild 1
5 6 - 15 times a day Normal 2
6 I don’t smoke Extremely Severe 22
7 I don’t smoke Mild 26
8 I don’t smoke Moderate 37
9 I don’t smoke Normal 57
10 I don’t smoke Severe 15
11 Occasionally Extremely Severe 6
12 Occasionally Mild 1
13 Occasionally Moderate 6
14 Occasionally Normal 10
15 Occasionally Severe 3
In [31]:
x=df['How often do you consume alcohol in a week?'].value_counts()/196*100
x=pd.DataFrame(x)
x['Values'] = x.index
In [32]:
plt.rcParams["figure.figsize"] = [7,7]
startingRadius = 0.7 + (0.3* (len(data)-1))
for index, row in x.iterrows():
    scenario = row['Values']
    percentage = row['How often do you consume alcohol in a week?']
    textLabel = scenario + '-->' + str(round(percentage, 2)) +'%'
    remainingPie = 100 - percentage

    donut_sizes = [remainingPie, percentage]

    plt.text(0.01, startingRadius + 0.07, textLabel, horizontalalignment='center', verticalalignment='center')
    plt.pie(donut_sizes, radius=startingRadius, startangle=90, colors = ['#C3E0E5','#274472'],
            wedgeprops={"edgecolor": "white", 'linewidth': 1})

    startingRadius-=0.3

# equal ensures pie chart is drawn as a circle (equal aspect ratio)
plt.axis('equal')

# create circle and place onto pie chart
circle = plt.Circle(xy=(0, 0), radius=0.35, facecolor='white')
plt.gca().add_artist(circle)
plt.show()
In [33]:
x=df['How often do you smoke in a day?'].value_counts()/196*100
x=pd.DataFrame(x)
x['Values'] = x.index
In [34]:
plt.rcParams["figure.figsize"] = [7,7]
startingRadius = 0.7 + (0.3* (len(data)-1))
for index, row in x.iterrows():
    scenario = row['Values']
    percentage = row['How often do you smoke in a day?']
    textLabel = scenario + '-->' + str(round(percentage, 2)) +'%'
    remainingPie = 100 - percentage

    donut_sizes = [remainingPie, percentage]

    plt.text(0.01, startingRadius + 0.07, textLabel, horizontalalignment='center', verticalalignment='center')
    plt.pie(donut_sizes, radius=startingRadius, startangle=90, colors = ['#C3E0E5','#274472'],
            wedgeprops={"edgecolor": "white", 'linewidth': 1})

    startingRadius-=0.3

# equal ensures pie chart is drawn as a circle (equal aspect ratio)
plt.axis('equal')

# create circle and place onto pie chart
circle = plt.Circle(xy=(0, 0), radius=0.35, facecolor='white')
plt.gca().add_artist(circle)
plt.show()
In [35]:
def scale_chart(labels,x_data,y_data):

    colors = ['#274472', '#42669e','#6d93cf','#94bbf7']
    fig = go.Figure()

    for i in range(0, len(x_data[0])):
        for xd, yd in zip(x_data, y_data):
            fig.add_trace(go.Bar(
                x=[xd[i]], y=[yd],
                orientation='h',
                marker=dict(
                    color=colors[i],
                    line=dict(color='rgb(248, 248, 249)', width=1)
                )
            ))

    fig.update_layout(
        xaxis=dict(
            showgrid=False,
            showline=False,
            showticklabels=False,
            zeroline=False,
            domain=[0.15, 1]
        ),
        yaxis=dict(
            showgrid=False,
            showline=False,
            showticklabels=False,
            zeroline=False,
        ),
        barmode='stack',
        paper_bgcolor='rgb(248, 248, 255)',
        plot_bgcolor='rgb(248, 248, 255)',
        margin=dict(l=120, r=10, t=140, b=80),
        showlegend=False,
    )

    annotations = []

    for yd, xd in zip(y_data, x_data):
        # labeling the y-axis
        annotations.append(dict(xref='paper', yref='y',
                                x=0.14, y=yd,
                                xanchor='right',
                                text=str(yd),
                                font=dict(family='Arial', size=10,
                                          color='rgb(67, 67, 67)'),
                                showarrow=False, align='right'))
        # labeling the first percentage of each bar (x_axis)
        annotations.append(dict(xref='x', yref='y',
                                x=xd[0] / 2, y=yd,
                                text=str(xd[0]) + '%',
                                font=dict(family='Arial', size=12,
                                          color='rgb(248, 248, 255)'),
                                showarrow=False))
        # labeling the first Likert scale (on the top)
        if yd == y_data[-1]:
            annotations.append(dict(xref='x', yref='paper',
                                    x=xd[0] / 2, y=1.1,
                                    text=top_labels[0],
                                    font=dict(family='Arial', size=12,
                                              color='rgb(67, 67, 67)'),
                                    showarrow=False))
        space = xd[0]
        for i in range(1, len(xd)):
                # labeling the rest of percentages for each bar (x_axis)
                annotations.append(dict(xref='x', yref='y',
                                        x=space + (xd[i]/2), y=yd,
                                        text=str(xd[i]) + '%',
                                        font=dict(family='Arial', size=12,
                                                  color='rgb(248, 248, 255)'),
                                        showarrow=False))
                # labeling the Likert scale
                if yd == y_data[-1]:
                    annotations.append(dict(xref='x', yref='paper',
                                            x=space + (xd[i]/2), y=1.1,
                                            text=top_labels[i],
                                            font=dict(family='Arial', size=12,
                                                      color='rgb(67, 67, 67)'),
                                            showarrow=False))
                space += xd[i]

    fig.update_layout(annotations=annotations)
    print("0 = Did not apply to me at all\n1 = Applied to me to some degree, or some of the time\n2 = Applied to me to a considerable degree or a good part of time\n3 = Applied to me very much or most of the time")
    fig.show()
In [36]:
top_labels = ['0','1','2','3']
In [37]:
# depression
y_data=['Were you not able to experience positive feelings?',
'Do you find it difficult to work up the initiative to do things?',
'Do you feel that you have nothing to look forward to?',
'Do you feel sad and depressed?',
'Do you find it hard to get enthusiastic about things?',
'Do you feel you weren\'t worth much as a person?',
'Do you feel that life has become meaningless?']
x_data=[]
for i in df_dep.columns:
    x_data.append(list(df[i].value_counts()/196*100))
np_array = np.array(x_data)
np_round_to_tenths = np.around(np_array, 1)
x_data = list(np_round_to_tenths)
scale_chart(top_labels,x_data,y_data)
0 = Did not apply to me at all
1 = Applied to me to some degree, or some of the time
2 = Applied to me to a considerable degree or a good part of time
3 = Applied to me very much or most of the time
In [38]:
# anxiety
y_data=['Were you aware of the dryness in your mouth?',
'Do you experience breathing difficulty (eg, excessively rapid breathing, breathlessness in the absence of physical exertion).',
'Do you experience trembling (eg, in your hands).',
'Are you worried about situations in which you might panic and make a fool of yourself?',
'Do you find yourself on the verge of panicking?',
'How aware are you of the action of your heart in the absence of any physical exertion (eg, sense of heart rate increase, heart missing a beat).',
'Do you feel scared without any good reason?']
x_data=[]
for i in df_anx.columns:
    x_data.append(list(df[i].value_counts()/196*100))
np_array = np.array(x_data)
np_round_to_tenths = np.around(np_array, 1)
x_data = list(np_round_to_tenths)
scale_chart(top_labels,x_data,y_data)
0 = Did not apply to me at all
1 = Applied to me to some degree, or some of the time
2 = Applied to me to a considerable degree or a good part of time
3 = Applied to me very much or most of the time
In [39]:
#stress
y_data=['Do you find it hard to calm down after something upset you?',
'Do you tend to over-react in situations?',
'Do you find yourself in a state of nervous tension?',
'Do you find yourself getting agitated?',
'Do you find it difficult to relax?',
'Do you find it difficult to tolerate interruptions while doing something?',
'Do you find yourself getting impatient when you are delayed in any way (eg, elevators, traffic lights, being kept waiting).']
x_data=[]
for i in df_str.columns:
    x_data.append(list(df[i].value_counts()/196*100))
np_array = np.array(x_data)
np_round_to_tenths = np.around(np_array, 1)
x_data = list(np_round_to_tenths)
scale_chart(top_labels,x_data,y_data)
0 = Did not apply to me at all
1 = Applied to me to some degree, or some of the time
2 = Applied to me to a considerable degree or a good part of time
3 = Applied to me very much or most of the time